Libraries
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(ggplot2)
library(tidyr)
library(dplyr)
library(tibble)
library(plotly)
library(ggcorrplot)
set.seed(1)
Import data
data <- read.csv("data/mp_batteries.csv", sep=",", header=TRUE) %>%
drop_na()
knitr::kable(head(data, 10))
| mp-30_Al |
Al0-2Cu |
Al |
Cu |
Al2Cu |
3.0433992 |
0.0890331 |
1368.48055 |
5562.7901 |
121.840086 |
495.272533 |
0.0000000 |
0.6666667 |
0.0000000 |
0.0000000 |
1 |
0 |
| mp-1022721_Al |
Al1-3Cu |
Al |
AlCu |
Al3Cu |
1.2436528 |
-0.0215863 |
1112.93655 |
4418.9798 |
-24.024232 |
-95.389622 |
0.5000000 |
0.7500000 |
0.0740612 |
0.0962458 |
1 |
0 |
| mp-8637_Al |
Al0-5Mo |
Al |
Mo |
Al5Mo |
4.7625743 |
0.1227568 |
1741.50416 |
7175.7017 |
213.781556 |
880.866507 |
0.0000000 |
0.8333333 |
0.4114601 |
0.0452120 |
1 |
0 |
| mp-129_Al |
Al0-12Mo |
Al |
Mo |
Al12Mo |
12.7238931 |
0.0431214 |
2298.81076 |
7346.2323 |
99.128013 |
316.780060 |
0.0000000 |
0.9230769 |
0.0000000 |
0.0114456 |
1 |
0 |
| mp-91_Al |
Al0-12W |
Al |
W |
Al12W |
12.4945977 |
0.0292342 |
1900.74513 |
7332.7186 |
55.566774 |
214.366205 |
0.0000000 |
0.9230769 |
0.0000000 |
0.0000000 |
1 |
0 |
| mp-1055908_Al |
Al0-12Mn |
Al |
Mn |
MnAl12 |
18.2361563 |
0.0397314 |
2547.69280 |
7592.9161 |
101.223298 |
301.676876 |
0.0000000 |
0.9230769 |
0.1454643 |
0.0000000 |
1 |
0 |
| mp-2658_Al |
Al0-1Fe |
Al |
Fe |
AlFe |
0.7711539 |
0.4717287 |
970.75702 |
5622.3562 |
457.933974 |
2652.226958 |
0.0000000 |
0.5000000 |
0.7613994 |
0.0000000 |
1 |
0 |
| mp-16722_Al |
Al1-10.25V |
Al |
Al10V |
Al41V4 |
0.0027108 |
-0.0155827 |
61.37701 |
176.4151 |
-0.956421 |
-2.749028 |
0.9090909 |
0.9111111 |
0.0118097 |
0.0125861 |
1 |
0 |
| mp-998981_Al |
Al1-3Ti |
Al |
TiAl |
TiAl3 |
0.9562924 |
0.1602450 |
1248.40362 |
4248.4211 |
200.050419 |
680.788169 |
0.5000000 |
0.7500000 |
0.1415912 |
0.0244962 |
1 |
0 |
| mp-8633_K |
K0-3Cr |
K |
Cr |
K3Cr |
15.8029363 |
-0.7487069 |
474.94813 |
667.5593 |
-355.596958 |
-499.806269 |
0.0000000 |
0.7500000 |
0.4025263 |
0.6621618 |
1 |
0 |
- Battery.ID:
character - Unique battery
identifier
- Battery.Formula:
character - Chemical
formula of battery
- Working.Ion:
character - Primary
working ion
- Formula.Charge:
character - Formula in
charged state
- Formula.Discharge:
character - Formula
in discharged state
- Max.Delta.Volume:
numeric - Max volume
change (%)
- Average.Voltage:
numeric - Average
operating voltage
- Gravimetric.Capacity:
numeric -
Capacity per unit mass
- Volumetric.Capacity:
numeric -
Capacity per unit volume
- Gravimetric.Energy:
numeric - Energy
per unit mass
- Volumetric.Energy:
numeric - Energy
per unit volume
- Atomic.Fraction.Charge:
numeric -
Atomic fraction (charged)
- Atomic.Fraction.Discharge:
numeric -
Atomic fraction (discharged)
- Stability.Charge:
numeric - Stability
in charged state
- Stability.Discharge:
numeric -
Stability in discharged state
- Steps:
integer - Number of transition
steps
- Max.Voltage.Step:
numeric - Largest
voltage difference
Basic statistics
data %>% summarise(n())
## n()
## 1 4351
data %>%
summarise(across(where(is.numeric),
list(mean = ~mean(.),
median = ~median(.),
min = ~min(.),
max = ~max(.),
sd = ~sd(.),
n_unique = ~n_distinct(.)),
.names = "{col}_{fn}")) %>%
pivot_longer(everything(),
names_to = c("statistic", "column"),
names_sep = "_",
values_to = "value") %>%
pivot_wider(names_from = column, values_from = value) %>%
knitr::kable()
| Max.Delta.Volume |
0.3753137 |
0.0420271 |
0.0000162 |
2.931932e+02 |
6.8518375 |
4342 |
| Average.Voltage |
3.0831427 |
3.3005818 |
-7.7547512 |
5.456883e+01 |
1.8220562 |
4351 |
| Gravimetric.Capacity |
158.2908894 |
130.6909797 |
5.1765430 |
2.557627e+03 |
164.9136411 |
3330 |
| Volumetric.Capacity |
610.6240987 |
507.0312049 |
24.0790699 |
7.619191e+03 |
563.8531258 |
4342 |
| Gravimetric.Energy |
444.1063802 |
401.7876573 |
-583.5458444 |
5.926950e+03 |
351.0481297 |
4351 |
| Volumetric.Energy |
1664.0484137 |
1463.7877150 |
-2208.0745659 |
1.830590e+04 |
1297.7985678 |
4351 |
| Atomic.Fraction.Charge |
0.0398558 |
0.0000000 |
0.0000000 |
9.090909e-01 |
0.0885604 |
126 |
| Atomic.Fraction.Discharge |
0.1590772 |
0.1428571 |
0.0074074 |
9.933333e-01 |
0.1203743 |
192 |
| Stability.Charge |
0.1425666 |
0.0731920 |
0.0000000 |
6.487098e+00 |
0.3782776 |
3050 |
| Stability.Discharge |
0.1220717 |
0.0487845 |
0.0000000 |
6.277809e+00 |
0.3523182 |
3933 |
| Steps |
1.1670880 |
1.0000000 |
1.0000000 |
6.000000e+00 |
0.4637496 |
6 |
| Max.Voltage.Step |
0.1502897 |
0.0000000 |
0.0000000 |
2.696069e+01 |
0.6300680 |
600 |
Data distributions
data %>%
select(where(is.numeric)) %>%
gather(key = "attribute", value = "value") %>%
ggplot(aes(x = value)) +
geom_histogram(bins = 30, fill = "lightblue", color = "black") +
facet_wrap(~attribute, scales = "free") +
theme_minimal() +
labs(title = "Distribution of values", x = "Value", y = "Frequency")

Correlations analysis
numeric_columns <- data %>% select(where(is.numeric))
cor_matrix <- cor(numeric_columns, use = "complete.obs")
cor_data <- as.data.frame(as.table(cor_matrix))
# Interaktywny wykres
plot_ly(
data = cor_data,
x = ~Var1,
y = ~Var2,
z = ~Freq,
zmin = -1,
zmax = 1,
type = "heatmap",
colors = c("blue", "white", "red")
) %>%
layout(
title = "Correlation matrix",
xaxis = list(title = ""),
yaxis = list(title = "")
)
Trends
ggplot(data, aes(x = Gravimetric.Capacity, y = Atomic.Fraction.Discharge)) +
geom_point(alpha = 0.6, color = "blue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
facet_wrap(~ Working.Ion, scales = "free") +
scale_y_continuous(limits = c(0, 1)) +
scale_x_continuous(limits = c(0, 3000)) +
labs(
title = "Relationship between Gravimetric Capacity and Atomic Fraction Discharge",
x = "Gravimetric Capacity",
y = "Atomic Fraction Discharge"
) +
theme_minimal()
